/*
Creates figures comparing treated and comparison establishments
		// input: estabchars_wide, estabtreat_forRAIS_deid, estab_union_act
		// output: size_byTC, industry_alt_by_TC, region_byTC, women_emp_sh_byTC
*/

cap log close
cap log using "$logs/sample_descriptives_log", replace

* Load data with establishment characteristics
use "$files/estabchars_wide.dta", clear 
// merge with our amenities sample 
preserve
	use "$files/estab_union_act.dta", clear
	keep if inrange(year, 2012, 2017)
	keep employer_id treat
	duplicates drop
	// merge cnpj to establishment ids 
	gen cnpj=subinstr(employer_id, ".", "", .)
	replace cnpj=subinstr(cnpj, "/", "", .)
	replace cnpj=subinstr(cnpj, "-", "", .)
	gen h = length(cnpj)
	replace cnpj = "00"+cnpj if h==12
	drop h 
	destring cnpj, gen(fakeid_estab) 
	format  fakeid_estab %14.0f
	keep fakeid_estab
	duplicates drop 
	tempfile ams 
	save `ams'
	// CHECK TREAT IS THERE AND NON-MISSING
	use "$files/estabtreat_forRAIS_deid.dta", clear 
	keep fakeid_estab treat
	merge 1:1 fakeid_estab using `ams', nogen keep(3) 	
	tempfile samples 
	save `samples'
restore
merge 1:1 fakeid_estab using `samples', nogen keep(3) 	


* Comparison of industries 
gen cnae = floor(cnaesubcl_mode/1e5)
gen industry = .
replace industry=1 if cnae>=1 & cnae<5 // %farming and fishing 
replace industry=2 if cnae>=5 & cnae<10 // %extractive industries
replace industry=3 if cnae>=10 & cnae<35 // %manufacturing
replace industry=4 if cnae>=35 & cnae<41 // %electricity/gas/utilitiesv
replace industry=5 if cnae>=41 & cnae<45 // %construction
replace industry=6 if cnae>=45 & cnae<49 // %trade
replace industry=7 if cnae>=49 & cnae<55 // %transportation and warehousing
replace industry=8 if cnae>=55 & cnae<58 // %accomodation and food
replace industry=9 if cnae>=58 & cnae<64 // %information and communication
replace industry=10 if cnae>=64 & cnae<68 // %banking and finance
replace industry=11 if cnae>=68 & cnae<69 // %real estate
replace industry=12 if cnae>=69 & cnae<77 // %professional activities
replace industry=13 if cnae>=77 & cnae<84 // %administrative activities
replace industry=14 if cnae>=84 & cnae<85 // %public administration
replace industry=15 if cnae>=85 & cnae<86 // %education
replace industry=16 if cnae>=86 & cnae<90 // %health
replace industry=17 if cnae>=90 & cnae<94 // %culture and sports
replace industry=18 if cnae>=94 & cnae<100 // %other services and organizations
drop cnae
label define ind 1 "Farm" 2 "Extract" 3 "Manuf" 4 "Utilities" 5 "Constr" 6 "Trade" ///
			7 "Transp" 8 "Hosp" 9 "Communic" 10 "Banking" 11 "Real estate" 12 "Profess" ///
			13 "Admin" 14 "Public" 15 "Educ" 16 "Health" 17 "Culture" 18 "Other", replace
label values industry ind

twoway (histogram industry if treat==1, percent color(sourapple1%30) discrete) ///        
		(histogram industry if treat==0, percent color(sourapple4%30) discrete), ///   
		legend(order(1 "Treated" 2 "Control" ) size(medium) rows(2) region(lstyle(none) color(none)) position(1) ring(0)) ///
		xtitle("") xla(#18,valuelabel labsize(medium) angle(45)) ///
		ytitle("Percent", size(medlarge)) yla(,labsize(medium)) ///
		plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(color(white))
graph export "$figures/industry_alt_byTC.tif", replace


* Comparison of region
encode state_mode, gen(state_mode_num)
gen region = "North" if inlist(state_mode, "AM", "AC", "RO", "RR", "AP", "PA", "TO")
replace region = "Northeast" if inlist(state_mode, "MA", "PI", "CE", "RN", "PB", "PE", "AL", "SE", "BA")
replace region = "Central" if inlist(state_mode, "MT", "GO", "DF", "MS")
replace region = "Southeast" if inlist(state_mode, "MG", "ES", "SP", "RJ")
replace region = "South" if inlist(state_mode, "PR", "SC", "RS")
encode region, gen(region_num)

twoway (histogram region_num if treat==1, percent color(sourapple1%30)  discrete) ///        
		(histogram region_num if treat==0, percent color(sourapple4%30) discrete), ///   
		legend(order(1 "Treated" 2 "Control" ) size(medium) rows(2) region(lstyle(none) color(none)) position(11) ring(0)) ///
		xtitle("") xla(#5,valuelabel labsize(medium)) ///
		ytitle("Percent", size(medlarge)) yla(,labsize(medium)) ///
		plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(color(white))
graph export "$figures/region_byTC.tif", replace


* Comparison of size
sum tot_emp*
mdesc tot_emp*
gen size_avg = tot_employment2014 
replace size_avg = round(size_avg, 1)
gen employer_size = 1 if size_avg == . 
replace employer_size = 2 if inrange(size_avg, 1, 4)
replace employer_size = 3 if inrange(size_avg, 5, 9)
replace employer_size = 4 if inrange(size_avg, 10, 19)
replace employer_size = 5 if inrange(size_avg, 20, 49)
replace employer_size = 6 if inrange(size_avg, 50, 99)
replace employer_size = 7 if inrange(size_avg, 100, 249)
replace employer_size = 8 if inrange(size_avg, 250, 499)
replace employer_size = 9 if inrange(size_avg, 500, 999)
replace employer_size = 10 if size_avg>=1000 & size_avg!=. 
label define size 1 "None" 2 "1-4" 3 "5-9" 4 "10-19" 5 "20-49" ///
6 "50-99" 7 "100-249" 8 "250-499" 9 "500-999" 10 "+1000", replace
label values employer_size size

twoway (histogram employer_size if treat==1, percent color(sourapple1%30) discrete) ///        
		(histogram employer_size if treat==0, percent color(sourapple4%30) discrete), ///   
		legend(order(1 "Treated" 2 "Control" ) size(medium) rows(2) region(lstyle(none) color(none)) position(1) ring(0)) ///
		xtitle("") xla(#10,valuelabel labsize(medium) angle(45)) ///
		ytitle("Percent", size(medlarge)) yla(,labsize(medium)) ///
		plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(color(white))
graph export "$figures/size_byTC.tif", replace


* Comparison of % female
cap drop sharefemale_2014
forvalues y = 2011/2017 {
				replace tot_fememployment`y' = 0 if (tot_fememployment`y'==.)&(tot_employment`y'!=.) 
				gen sharefemale_`y' = tot_fememployment`y'/tot_employment`y'

}
sum sharefemale_*
mdesc sharefemale_*
gen sharewomen = sharefemale_2014

twoway (histogram sharewomen if treat==1, percent color(sourapple1%30) bin(50)) ///        
		(histogram sharewomen if treat==0, percent color(sourapple4%30)  bin(50)), ///   
		legend(order(1 "Treated" 2 "Control" ) size(medium) rows(2) region(lstyle(none) color(none)) position(11) ring(0)) ///
		xtitle("") xla(#10, format(%02.1f) labsize(medium)) ///
		ytitle("Percent", size(medlarge)) yla(,labsize(medium)) ///
		plotregion(fcolor(white)) graphregion(fcolor(white)) graphregion(color(white))
graph export "$figures/women_emp_sh_byTC.tif", replace


log close
